clear all
set printcolor gs1, permanently
set more off
cap log close
program drop _all
set matsize 11000, permanently

** Figure 4 

*************
** (a) Brands
*************
use "INTERMEDIATE/product_RMS_def10_firm_sampleE_06_15.dta", clear
gen product_flag = "def10"

foreach SSS in E {  
foreach DDD in X { 

append using "INTERMEDIATE/product_RMS_def`DDD'_firm_sample`SSS'_06_15.dta"
replace product_flag = "def`DDD'" if product_flag ==""
	}
}

gen logprice = log(price)
gen logquantity = log(quantity)
gen logrevenue  = log(revenue)
egen fe=group(module quarter)
gen sample_entry_spikes=0
replace sample_entry_spikes=1 if cohort==21 | cohort==26 | cohort==29 | cohort==33 | cohort==37
drop if sample_entry_spikes==1
qui tab cohort, gen(cohort_d)
keep if cohort>=3 & cohort<=23
drop if age>16 
drop if age==0
drop if maxAge<=16
keep if flag_longitudinal=="Complete"
foreach num of numlist 5/23 {
gen cohort_nd`num'=cohort_d`num'-((`num'-1)*cohort_d4-(`num'-2)*cohort_d3)
}
drop cohort_nd23

foreach DDD in 10 X { 
areg logrevenue i.age cohort_nd* if product_flag == "def`DDD'" & age<=16, absorb(fe) cluster(module)
matrix results_bal_rev`DDD'=[e(b)',vecdiag(e(V))']
svmat results_bal_rev`DDD'
rename results_bal_rev`DDD'1 coef`DDD'
rename results_bal_rev`DDD'2 se`DDD'
gen age`DDD'=_n
gen coef_u`DDD' = coef`DDD' + 1.96*sqrt(se`DDD')
gen coef_l`DDD' = coef`DDD' - 1.96*sqrt(se`DDD')
}


twoway ///
(rarea coef_l10 coef_u10 age10 if age10<=16&coef10 !=., color(ebg%40) pstyle(ci)) ///
(rarea coef_lX coef_uX ageX if ageX<=16& coefX !=., color(erose%40) pstyle(ci)) ///
(connected coef10 age10 if age10 <=16 & coef10 !=., color(midblue) lwidth(thick)) ///
(connected coefX ageX if ageX<=16 & coefX !=., color(red) lwidth(thick) lpattern(longdash)) ///
, ytitle("Estimated Sales (log scale)") xtitle("Age (quarters)") xlabel(4(4)16) ///
title("") ///
legend(row(1) order(3 "Barcode" 4 "Brand") ) ///
graphregion(color(white)) plotregion(fcolor(white)) note("")
graph export "OUTPUT/Figure4a.eps", replace



*************
** (b) Novelty
*************

clear all
set more off
cap log close
program drop _all

use "INTERMEDIATE/product_RMS_def10_firm_sampleE_06_15.dta", replace
gen logprice = log(price)
label variable logprice "Price (log)"
gen logquantity = log(quantity)
label variable logquantity "Quantity (log)"
gen logrevenue  = log(revenue)
label variable logrevenue "Revenue (log)"
gen logrevenue_store      = log(revenue/num_store)
label variable logrevenue_store "Average revenue per store (log)"
gen logstore=log(num_store)
label variable logstore "Number of Stores (log)"
egen fe=group(module quarter)
qui tab cohort, gen(cohort_d)
gen sample_B16=1  if (cohort>=3 & cohort<=23)  & maxAge>16  & flag_longitudinal=="Complete" & age<=16 & age>0
save "INTERMEDIATE/temp3.dta", replace


use product year quarter qtr censored type using "INTERMEDIATE/product_RMS_def10_firm_sampleA_06_15.dta" if type=="entry" , clear
gen new=(censored=="not censored" | censored=="right censored")
drop censored type  
save "INTERMEDIATE/list.dta", replace 


use "INTERMEDIATE/products_clean_extras.dta", clear
capture drop gr* taggr* upc1 status datemodified
capture drop size2_code size2_amount size2_units_desc size2_units_orig size2_amount_orig ///
size2_units flavor_code form_code formula_code container_code salt_content_code style_code ///
type_code product_code variety_code organic_claim_code usda_organic_seal_code ///
common_consumer_name_code strength_code scent_code dosage_code gender_code ///
target_skin_condition_code use_code size2_code size1_code_uc organic_claim_descr ///
usda_organic_seal_descr name_firm size1_change_flag_uc multi scent_clean ///
size1_units_orig size1_amount_orig size1_units dataset_found_uc 

tostring size1_amount, gen(size1_amount_descr) usedisplayformat force
drop size1_amount
  
foreach var in size1_amount_descr flavor_descr form_descr formula_descr container_descr salt_content_descr ///
style_descr type_descr product_descr variety_descr common_consumer_name_descr strength_descr ///
scent_descr dosage_descr gender_descr target_skin_condition_descr use_descr {
replace `var'="" if `var'=="." | `var'=="NOT APPLICABLE" | `var'=="NOT STATED" | `var'=="N/A"   
replace `var'=" " if `var'==""
}

drop if gcc==""
drop if product_module_code==.
keep if upc_ver_uc==1
drop upc_ver_uc
rename upc product 
merge 1:1 product using "INTERMEDIATE/list.dta", keep(match) nogenerate
rename common_consumer_name_descr common_consumer_descr
rename target_skin_condition_descr target_skin_descr  
 
global all_variables brand_descr size1_amount flavor_descr form_descr formula_descr ///
container_descr salt_content_descr style_descr type_descr product_descr variety_descr ///
common_consumer_descr strength_descr scent_descr dosage_descr gender_descr ///
target_skin_descr use_descr organic_clean generic gcc 

foreach var in $all_variables {
sort product_module_code year qtr
egen one_`var'=tag(product_module_code `var')
capture replace one_`var'=0 if `var'==" "
capture replace one_`var'=0 if `var'==.
sort gcc product_module_code year qtr
egen onef_`var'=tag(gcc product_module_code `var')
capture replace onef_`var'=0 if `var'==" "
capture replace onef_`var'=0 if `var'==.
}
save "INTERMEDIATE/temp.dta", replace

foreach var in $all_variables {
bysort year qtr product_module_code  `var': egen new_one_`var'=max(one_`var')
drop one_`var'
rename new_one_`var' one_`var'
bysort year qtr product_module_code  `var': egen new_one_`var'=max(onef_`var')
drop onef_`var'
rename new_one_`var' onef_`var'
}
egen total_new=rowtotal(one_*)
egen total_new_firm=rowtotal(onef_*)
save "INTERMEDIATE/temp.dta", replace 



use "INTERMEDIATE/temp.dta", clear
levelsof product_module_code, local(modules)
foreach l of local modules {

use "INTERMEDIATE/temp.dta" if product_module_code==`l', replace 
	macro drop relevant relevantfirm
	gen total_characteristics=.
	gen total_combinations=.

	local i=0
	global relevant    
	
	foreach var in $all_variables {
	qui su one_`var'
		if `r(mean)'!=0 {
			local i = `i' + 1
            global relevant $relevant `var'
						}
			}
	qui replace total_characteristics=`i' 
	
	gen total_characteristicsfirm=.

	local i=0
	global relevantfirm gcc

	foreach var in $all_variables {
	qui su onef_`var'
		if `r(mean)'!=0 {
			local i = `i' + 1
			global relevantfirm $relevantfirm `var'
						}
			}
	qui replace total_characteristicsfirm=`i' 

	sort year qtr
	di "$relevant"
	egen combinations=group($relevant)
	su combinations
	replace total_combinations=`r(max)' 
	
	sort combinations year qtr
	egen tag=tag(combinations)
	bysort year qtr combination: egen total_new_combined=max(tag)
	keep product_module_code year qtr product total_characteristicsfirm total_new_firm total_new total_characteristics total_combinations total_new_combined new
	save "INTERMEDIATE/temp_`l'.dta", replace			
}


clear
save "INTERMEDIATE/newness_combinations.dta", replace emptyok
use "INTERMEDIATE/temp.dta", clear
levelsof product_module_code, local(modules)

use "INTERMEDIATE/newness_combinations.dta", clear
foreach l of local modules {
append using "INTERMEDIATE/temp_`l'.dta"
}

keep if new==1
drop new year qtr
gen newness_index=total_new/total_characteristics
gen newness_index_firm=total_new_firm/total_characteristicsfirm
format product %14.0g 
label variable total_new "Total new characteristics"
label variable total_characteristics "Total relevant characteristics in module"
label variable total_new_combined "=1 if this product is a new combinations or characteristics"
label variable total_new_firm "Total new characteristics for the firm"
label variable total_characteristicsfirm "Total relevant characteristics in module for the firm"
label variable newness_index "Newness index: total_new/total_characteristics"
label variable newness_index_firm "Newness index firm: total_new_firm/total_characteristicsfirm"
label variable total_combinations "Total possible relevant combinations"
saveold "INTERMEDIATE/newness_combinations.dta", replace 

use product module using "INTERMEDIATE/product_RMS_def10_firm_sampleE_06_15.dta", replace
merge m:1 product using "INTERMEDIATE/newness_combinations.dta", keepusing(newness_index total_new_combined total_characteristics total_new)
gen newness_index_combined=(total_new + total_new_combined)/(total_characteristics + 1)
duplicates drop
bys module: egen qnew = xtile(newness_index) if newness_index>0 & newness_index!=., nq(4)
replace qnew=0 if newness_index==0
replace qnew=. if newness_index==.
bys module: egen qnew_b = xtile(newness_index) if newness_index>0 & newness_index!=., nq(4)
gen supernew_b=(qnew_b==4)
replace supernew_b=. if newness_index==. 
gen supernew_b2=(newness_index>0 & newness_index!=.)
replace supernew_b2=. if newness_index==. 
bys module: egen qnew_c = xtile(newness_index_combined) if newness_index_combined>0 &newness_index_combined!=., nq(4)
gen supernew_c=(qnew_c==4)
replace supernew_c=. if newness_index_combined==. 
gen supernew_c2=(newness_index_combined>0 & newness_index!=.)
replace supernew_c2=. if newness_index_combined==. 
save "INTERMEDIATE/newness_combined.dta", replace

use "INTERMEDIATE/temp3.dta", clear
merge m:1 product using "INTERMEDIATE/newness_combined.dta", nogenerate
replace qnew=9 if qnew==.
capture replace supernew_b=99 if supernew_b==.
capture replace supernew_b2=99 if supernew_b2==.
capture replace supernew_c=99 if supernew_c==.
capture replace supernew_c2=99 if supernew_c2==.

foreach num of numlist 5/23 {
gen cohort_nd`num'=cohort_d`num'-((`num'-1)*cohort_d4-(`num'-2)*cohort_d3)
}
label variable logrevenue "Estimated Sales (log scale)"

foreach sample of varlist sample_B16   {
foreach var of varlist logrevenue   {
local v1: variable label `var'
foreach varX of varlist supernew_b   {  
preserve
keep if `varX'<=1 
areg `var' i.age#i.`varX' cohort_nd* if `sample'==1, absorb(fe) cluster(fe)
matrix results=[e(b)',vecdiag(e(V))']
svmat results
rename results1 coef
rename results2 se
keep coef se
seq quarter, f(1) t(2)            
seq age, f(1) t(16) b(2)
gen n = _n
drop if n>32				       
drop n
reshape wide coef se, i(age) j(quarter)
sort age
foreach num of numlist 1(1)2 {    
gen coef_`num'_u = coef`num' + 1.96*sqrt(se`num')
gen coef_`num'_l = coef`num' - 1.96*sqrt(se`num')
}
								  
foreach num of numlist 1(1)2 {
gen norm_`num' = coef`num'[1]
gen coef_norm_`num' = coef`num'-norm_`num'
gen coef_norm_`num'_u = coef_norm_`num' + 1.96*sqrt(se`num')
gen coef_norm_`num'_l = coef_norm_`num' - 1.96*sqrt(se`num')
}

twoway ///
(rarea coef_norm_2_u coef_norm_2_l age if age<=16, color(ebg%40) pstyle(ci)) ///
  (rarea coef_norm_1_u coef_norm_1_l age if age<=16, color(erose%40) pstyle(ci)) ///
(connected coef_norm_2 age if age<=16,lpattern(longdash) color(midblue) lwidth(thick)) ///
(connected coef_norm_1 age if age<=16,lpattern(solid) color(red) lwidth(thick)) ///
, ytitle("`v1'" "") xtitle(Age in quarters) xlabel(4[4]16) /// 
title("") ///
leg(on row(1) order(3 "High-novelty" 4 "Other")) ///
graphregion(color(white)) plotregion(fcolor(white)) note(" ")
graph export "OUTPUT/Figure4b.eps", replace
restore
}
}
}

erase  "INTERMEDIATE/temp.dta" 
erase  "INTERMEDIATE/temp3.dta" 
erase "INTERMEDIATE/list.dta" 

*************
** (c) Superstar
*************

use "INTERMEDIATE/product_RMS_def10_firm_sampleE_06_15.dta", clear
gen logprice = log(price)
label variable logprice "Price (log)"
gen logquantity = log(quantity)
label variable logquantity "Quantity (log)"
gen logrevenue  = log(revenue)
label variable logrevenue "Revenue (log)"
gen logrevenue_store      = log(revenue/num_store)
label variable logrevenue_store "Average revenue per store (log)"
gen logstore=log(num_store)
label variable logstore "Number of Stores (log)"
egen fe=group(module quarter)
qui tab cohort, gen(cohort_d)
gen sample_B16=1  if (cohort>=3 & cohort<=23)  & maxAge>16  & flag_longitudinal=="Complete" & age<=16 & age>0
gen sample_SB16=1 if (cohort>=3 & cohort<=23)  & age<maxAge & flag_longitudinal=="Complete" & age<=16 & age>0
gen sample_U=1 if (cohort>=3  & cohort<=36) & age<maxAge & flag_longitudinal=="Complete" & age<=16 & age>0
save "INTERMEDIATE/temp.dta", replace

use "INTERMEDIATE/product_RMS_def10_firm_sampleE_06_15.dta", replace
drop if cohort==1 | cohort==2
keep if cohort<=36 // 23?
keep if age==1 | age==2 | age==3 | age==4
keep if maxAge>4
collapse (sum) revenue_r, by(product module cohort maxAge)
gen cohort2=int((cohort-2.9)/4)+1

bys module cohort2: egen count_mc = count(revenue_r) 
bys module cohort2: egen tile_mc = xtile(revenue_r) if count_mc>=10, nq(10)
gen superstar_mc=(tile_mc==10) 

bys module: egen count_m = count(revenue_r)
bys module: egen tile_m = xtile(revenue_r) if count_m>=10, nq(10)
gen superstar_m=(tile_m==10)  

bys cohort2: egen count_c = count(revenue_r) 
bys module cohort2: egen tile_c = xtile(revenue_r) if count_c>=10, nq(10)
gen superstar_c=(tile_c==10) 
egen count = count(revenue_r)
egen tile = xtile(revenue_r) if count>=10, nq(10)
gen superstar=(tile==10)  

rename revenue_r revenue_1styear
keep product tile_mc tile_m superstar_mc superstar_m tile_c tile superstar_c superstar
save "INTERMEDIATE/superstar.dta", replace

use "INTERMEDIATE/temp.dta", clear
merge m:1 product using "INTERMEDIATE/superstar.dta", nogenerate
replace tile_mc=99 if tile_mc==.
replace tile_m=99 if tile_m==.
replace superstar_mc=99 if superstar_mc==.
replace superstar_m=99 if superstar_m==.

foreach num of numlist 5/23 {
gen cohort_nd`num'=cohort_d`num'-((`num'-1)*cohort_d4-(`num'-2)*cohort_d3)
}

label variable logrevenue "Estimated Sales (log scale)"
foreach sample of varlist sample_B16 {
foreach var of varlist logrevenue {
local v1: variable label `var'
foreach varX of varlist superstar_mc {  
preserve
keep if `varX'<=1 
areg `var' i.age#i.`varX' cohort_nd* if `sample'==1, absorb(fe) cluster(module)
matrix results=[e(b)',vecdiag(e(V))']
svmat results
rename results1 coef
rename results2 se
keep coef se
seq quarter, f(1) t(2)            
seq age, f(1) t(16) b(2)
gen n = _n
drop if n>32				      
drop n
reshape wide coef se, i(age) j(quarter)
sort age
foreach num of numlist 1(1)2 {    //  CHANGE 5  
gen coef_`num'_u = coef`num' + 1.96*sqrt(se`num')
gen coef_`num'_l = coef`num' - 1.96*sqrt(se`num')
}

foreach num of numlist 1(1)2 {
gen norm_`num' = coef`num'[1]
gen coef_norm_`num' = coef`num'-norm_`num'
gen coef_norm_`num'_u = coef_norm_`num' + 1.96*sqrt(se`num')
gen coef_norm_`num'_l = coef_norm_`num' - 1.96*sqrt(se`num')
}

twoway ///
(rarea coef_norm_2_u coef_norm_2_l age if age<=16, color(ebg%40) pstyle(ci)) ///
(rarea coef_norm_1_u coef_norm_1_l age if age<=16, color(erose%40) pstyle(ci)) ///
(connected coef_norm_2 age if age<=16,color(midblue) lwidth(thick) lpattern(longdash)) ///
(connected coef_norm_1 age if age<=16,color(red) lwidth(thick)) ///
, ytitle("`v1'" " ") xtitle("Age (quarters)") xlabel(4[4]16) /// 
title("") ///
leg(on row(1) order(3 "High-sales" 4 "Other")) ///
graphregion(color(white)) plotregion(fcolor(white)) note(" ")
graph export "OUTPUT/Figure4c.eps", replace
restore
}
}
}

erase  "INTERMEDIATE/temp.dta" 
erase "INTERMEDIATE/superstar.dta"


*************
** (d) Durability
*************

use "INPUT/modcode_trip_frequency.dta", clear
egen rank = rank(average_number_of_trip_per_year), unique
egen group_trip=cut(rank), group(4)  
lab var group_trip "0 Durable 3 Non-Durable"
keep modcode group_trip
rename modcode module
save "INTERMEDIATE/temp2.dta",replace

use "INTERMEDIATE/product_RMS_def10_firm_sampleE_06_15.dta", clear
merge m:1 module using "INTERMEDIATE/temp2.dta"
keep if _merge==3
drop _merge

cap bys product: egen cohort = min(quarter)
gen logprice = log(price)
gen logquantity = log(quantity)
gen logrevenue  = log(revenue)
egen fe=group(module quarter)
qui tab cohort, gen(cohort_d)

keep if cohort>=3 & cohort<=23
drop if age>16 
drop if age==0
drop if maxAge<=16
keep if flag_longitudinal=="Complete"

foreach num of numlist 5/23 {
gen cohort_nd`num'=cohort_d`num'-((`num'-1)*cohort_d4-(`num'-2)*cohort_d3)
}
save "INTERMEDIATE/temp.dta", replace


use "INTERMEDIATE/temp.dta",clear

replace group_trip=2 if group_trip<2

areg logrevenue i.age#i.group_trip cohort_nd*   if age<=16, absorb(fe) cluster(module)
matrix results=[e(b)',vecdiag(e(V))']
svmat results
rename results1 coef
rename results2 se

keep coef se

seq quarter, f(1) t(2)
seq age, f(1) t(16) b(2)
gen n = _n
drop if n>32
drop n
reshape wide coef se, i(age) j(quarter)

foreach num of numlist 1(1)2 {
gen coef_`num'_u = coef`num' + 1.96*sqrt(se`num')
gen coef_`num'_l = coef`num' - 1.96*sqrt(se`num')
}

foreach num of numlist 1(1)2 {
gen norm_`num' = coef`num'[1]
gen coef_norm_`num' = coef`num'-norm_`num'
}

foreach num of numlist 1(1)2 {
gen norm_`num'_u = coef_`num'_u[1]
gen coef_norm_`num'_u = coef_`num'_u-norm_`num'_u
gen norm_`num'_l = coef_`num'_l[1]
gen coef_norm_`num'_l = coef_`num'_l-norm_`num'_l
}

twoway ///
(rarea coef_norm_2_u coef_norm_2_l age if age<=16, color(ebg%40) pstyle(ci)) ///
(rarea coef_norm_1_u coef_norm_1_l age if age<=16, color(erose%40) pstyle(ci)) ///
(connected coef_norm_2 age if age<=16, lwidth(thick) lpattern(longdash) mcolor(midblue) lcolor(midblue)) ///
(connected coef_norm_1 age if age<=16, lwidth(thick) mcolor(red) lcolor(red)) ///
, ytitle("Revenue (log)" "") xtitle(Age in quarters) xlabel(4[4]16) /// 
title("") ///
leg(on row(1) order(3 "Non-durable" 4 "Other")) ///
graphregion(color(white)) plotregion(fcolor(white))
graph export "OUTPUT/Figure4d.eps", replace

erase "INTERMEDIATE/temp.dta"
erase "INTERMEDIATE/temp2.dta"






